#Set Options
options(warn=-1)
options(scipen=999)
#Load Libraries
library(reshape)
library(plyr)
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:reshape':
##
## rename, round_any
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:reshape':
##
## rename
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forcats)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:plyr':
##
## here
## The following object is masked from 'package:reshape':
##
## stamp
## The following object is masked from 'package:base':
##
## date
library(ggplot2)
library(xlsx)
library(sp)
library(rgdal)
## rgdal: version: 1.4-4, (SVN revision 833)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.4.2, released 2019/06/28
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.6/Resources/library/rgdal/gdal
## GDAL binary built with GEOS: FALSE
## Loaded PROJ.4 runtime: Rel. 5.2.0, September 15th, 2018, [PJ_VERSION: 520]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.6/Resources/library/rgdal/proj
## Linking to sp version: 1.3-1
library(viridis)
## Loading required package: viridisLite
library(sjPlot)
library(sjmisc)
library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
library(margins)
library(effects)
## Loading required package: carData
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
#Link to source for cross-tabs
source("http://pcwww.liv.ac.uk/~william/R/crosstab.r")
# Read in cleaned ODIN data from Pennsylvania [put website]
odin_pa <- read.csv("/Users/gbarboza/Desktop/ODIN_PA/Overdose_Information_Network_Data_CY_January_2018_-_Current_Monthly_County_State_Police.csv", na.strings=c("","NA"))
#Do some data cleaning
#Race and Ethnicity -------------------------------------
odin_pa$hisp<-ifelse(odin_pa$Ethnicity.Desc=='Hispanic',1,0)
odin_pa$race_eth[odin_pa$hisp == 0 & odin_pa$Race=="White"]<-"NHWhite"
odin_pa$race_eth[odin_pa$hisp == 0 & odin_pa$Race=="Black"]<-"NHBlack"
odin_pa$race_eth[odin_pa$hisp == 1 ]<-"Hisp"
odin_pa$black<-ifelse(odin_pa$Race=='Black',1,0)
odin_pa$white<-ifelse(odin_pa$Race=='White',1,0)
#Response time-------------------------------------------
odin_pa$Response.Time.Desc[odin_pa$Response.Time.Desc == "DID NOT WORK"]<-NA
odin_pa$Response.Time.Desc <- factor(odin_pa$Response.Time.Desc)
#Date & Time__________________________________
odin_pa$DT <- mdy_hms(odin_pa$datetime)
#Insert hour, month and year as separate variables
odin_pa$hour <- hour(odin_pa$DT)
odin_pa$month <- month(odin_pa$DT)
odin_pa$year <- year(odin_pa$DT)
#Ages into numeric var______________________________
odin_pa <- odin_pa %>%
mutate(age = fct_recode(Age.Range,
"1" = "0 - 9" ,
"1" = "10-14" ,
"1"= "15 - 19",
"2" = "20 - 24",
"2" = "25 - 29",
"3"= "30 - 39" ,
"4" = "40 - 49" ,
"5" = "50 - 59" ,
"6" = "60 - 69" ,
"6" = "70 - 79" ,
"6" = "80 - *"
)
)
#Revival Action ___________________________________
odin_pa <- odin_pa %>%
mutate(revived = fct_recode(Revive.Action.Desc,
"Arrest" = "ARREST" ,
"Hospital" = "HOSPITAL CONSCIOUS" ,
"Hospital"="HOSPITAL UNCONSCIOUS",
"Refused" = "REFUSED TRANSPORT",
"Other" = "RELEASED",
"Referred to treatment"= "TRANSPORTED TO TREATMENT" ,
"Referred to treatment" = "VERBALLY REFERRED TO TREATMENT" ,
"Other" = "OTHER" ,
"NA" = "DON'T KNOW"
)
)
odin_pa <- odin_pa %>%
mutate(drugs = fct_recode(Susp.OD.Drug.Desc,
"Alcohol" = "ALCOHOL" ,
"Heroin" = "HEROIN" ,
"Fentanyl"="FENTANYL",
"Fentanyl" = "FENTANYL ANALOG/OTHER SYNTHETIC OPIOID",
"Pharma" = "PHARMACEUTICAL STIMULANT",
"Pharma"= "PHARMACEUTICAL OTHER" ,
"Pharma" = "PHARMACEUTICAL OPIOID" ,
"Benzos" = "BENZODIAZEPINES (I.E.VALIUM, XANAX, ATIVAN, ETC)" ,
"Methadone" = "METHADONE",
"Suboxone" = "SUBOXONE",
"MJ" = "MARIJUANA",
"MJ" = "SYNTHETIC MARIJUANA",
"Meth" = "METHAMPHETAMINE",
"Carf" = "CARFENTANIL",
"Salts" = "BATH SALTS",
"unknown" = "unknown"
)
)
odin_pa$heroin<-ifelse(odin_pa$drugs=='Heroin',1,0)
odin_pa$fentanyl<-ifelse(odin_pa$drugs=='Fentanyl',1,0)
odin_pa$pharma<-ifelse(odin_pa$drugs=='Pharma',1,0)
#Before any analysis let's make sure we have the right selections
#In order to know how many unique incidents there were, we need to consider that each incident could have multiple victims.
#The dataset does not take that into consideration, so we need to apply some filters
#The following dataset provides the count of all drugs involved with the incident using the
#key that identifies a unique record containing the details of a single incident (identified by date, time and location)
unique_incidents <- odin_pa[!duplicated(odin_pa[,c('Incident.ID')]),] # of the 14,400 total cases, 10,290 were unique incidents
duplicated_incidents <- odin_pa[duplicated(odin_pa[,c('Incident.ID')]),] # 4,110 were not unique, there are duplicate victims and/or multiple drugs suspected of causing the OD
#Below we get all incidents where each line represents a unique victim regardless of how many drugs were suspected
#in causing the OD
#The following dataset contains incidents with unique victim IDs (i.e. each incident may have had more than 1 victim) using the
#key that identifies a unique record for a victim.
#According to the result, there were 10,465 unique incidents with unique victim IDs
unique_incidents_victims <- odin_pa[!duplicated(odin_pa[,c('Incident.ID','Victim.ID')]),]
duplicated_incidents_victims <- odin_pa[duplicated(odin_pa[,c('Incident.ID','Victim.ID')]),]
##Finally, this dataset selects incidents with multiple distinct victims (note,
#some cases have multiple victimizations of the same person, this code excludes individuals with repeat victimizations
#as it is not necessary to include them (i.e. their characteristics do not change))
multiple_unique_victims <-unique_incidents_victims %>%
dplyr::group_by(Incident.ID) %>%
dplyr::add_count(Incident.ID) %>%
dplyr::distinct(Incident.ID, .keep_all = TRUE)
mean(multiple_unique_victims$n) #average number of persons per incident
## [1] 1.017007
table(multiple_unique_victims$n) #distribution of number of unique victims of drug OD per incident
##
## 1 2 3 4 9
## 10132 149 6 2 1
aggregate(n ~ race_eth, data=multiple_unique_victims, mean)
## race_eth n
## 1 Hisp 1.010187
## 2 NHBlack 1.037924
## 3 NHWhite 1.018672
aggregate(Dose.Count ~ race_eth, data=multiple_unique_victims, mean)
## race_eth Dose.Count
## 1 Hisp 0.959253
## 2 NHBlack 1.051896
## 3 NHWhite 1.015674
aggregate(Dose.Unit ~ race_eth, data=multiple_unique_victims, mean)
## race_eth Dose.Unit
## 1 Hisp 1.738540
## 2 NHBlack 2.011976
## 3 NHWhite 1.741584
#incidents and victims with each line representing a different drug suspected of causing the OD
#this dataset is the number of incidents with victims that had multiple drugs suspected of causing the OD using the
#unique identifier for each suspected drug causing the overdose.
unique_incidents_victims_drugs <- odin_pa[!duplicated(odin_pa[,c('Incident.ID','Victim.ID', 'Victim.OD.Drug.ID')]),]
duplicated_incidents_victims_drugs <- odin_pa[duplicated(odin_pa[,c('Incident.ID','Victim.ID', 'Victim.OD.Drug.ID')]),]
multiple_unique_victims_polydrugs <-unique_incidents_victims_drugs %>% #this dataset selects incidents with multiple distinct victims and counts
#how many drugs were suspected in causing their overdose
dplyr::group_by(Incident.ID) %>%
dplyr::add_count(Victim.ID) %>%
dplyr::distinct(Victim.ID, .keep_all = TRUE)
multiple_unique_victims_polydrugs$polydrug<-ifelse(multiple_unique_victims_polydrugs$n >1,1,0)
newdata <- multiple_unique_victims_polydrugs[ which(multiple_unique_victims_polydrugs$race_eth=='NHWhite'), ]
crosstab(newdata, row.vars = "Naloxone_Admin", col.vars = "polydrug", type = "c")
## polydrug 0 1
## Naloxone_Admin
## 0 32.37 39.64
## 1 67.63 60.36
## Sum 100.00 100.00
chisq.test(newdata$Naloxone_Admin, newdata$polydrug, correct = FALSE)
##
## Pearson's Chi-squared test
##
## data: newdata$Naloxone_Admin and newdata$polydrug
## X-squared = 36.23, df = 1, p-value = 0.000000001754
multiple_unique_victims$missing<-ifelse(is.na(multiple_unique_victims$Survived),1,0)
#same as unique incidents
mean(multiple_unique_victims_polydrugs$n)
## [1] 1.330721
table(multiple_unique_victims_polydrugs$n)
##
## 1 2 3 4 5 6 7
## 7766 2148 401 107 27 14 2
aggregate(n ~ race_eth, data=multiple_unique_victims_polydrugs, mean)
## race_eth n
## 1 Hisp 1.332209
## 2 NHBlack 1.360308
## 3 NHWhite 1.379449
#same result using the following code
polydruguse <- aggregate(Victim.OD.Drug.ID ~ Incident.ID+Victim.ID, data = unique_incidents_victims_drugs, FUN = length)
mean(polydruguse$Victim.OD.Drug.ID)
## [1] 1.330721
table(polydruguse$Victim.OD.Drug.ID)
##
## 1 2 3 4 5 6 7
## 7766 2148 401 107 27 14 2
##########Descriptive statistics
table(multiple_unique_victims_polydrugs$race_eth)
##
## Hisp NHBlack NHWhite
## 593 519 7474
#Chi-Square of indep vars by race
crosstab(multiple_unique_victims_polydrugs, row.vars = "age", col.vars = "race_eth", type = "c")
## race_eth Hisp NHBlack NHWhite
## age
## 1 3.71 5.01 2.09
## 2 30.86 32.37 35.88
## 3 31.70 28.13 37.33
## 4 22.43 13.10 14.84
## 5 9.11 14.26 7.60
## 6 2.19 7.13 2.26
## Sum 100.00 100.00 100.00
chisq.test(multiple_unique_victims_polydrugs$age, multiple_unique_victims_polydrugs$race_eth, correct = FALSE)
##
## Pearson's Chi-squared test
##
## data: multiple_unique_victims_polydrugs$age and multiple_unique_victims_polydrugs$race_eth
## X-squared = 137.79, df = 10, p-value < 0.00000000000000022
#Descriptives on dates
odin_pa %>%
ggplot(aes(wday(DT, label = TRUE))) +
geom_bar(fill = "midnightblue", alpha = 0.8) +
labs(x = NULL,
y = "Number of events")

race_hour_OD <- unique_incidents_victims %>% dplyr::group_by(race = race_eth, hour = hour(DT)) %>%
dplyr::count() %>% tidyr::spread(race, n)
race_month_OD <- unique_incidents_victims %>% dplyr::group_by(race = race_eth, month = month(DT)) %>%
dplyr::count() %>% tidyr::spread(race, n)
#save to excel for better plotting, I did not like any options here
#write.xlsx(data.frame(race_hour_OD), "race_hour_OD_excel.xlsx")
#write.xlsx(data.frame(race_month_OD), "race_month_OD_excel.xlsx")
DRUG_OD <- unique_incidents_victims_drugs %>% dplyr::group_by(ID = Incident.ID, drug = Susp.OD.Drug.Desc) %>%
dplyr::count() %>% tidyr::spread(drug, n)
d<- unique_incidents_victims %>% #max od in a single day
select(DT, race_eth) %>%
dplyr::group_by(race = race_eth, year = year(DT), month = month(DT), day = day(DT)) %>%
dplyr::count() %>% tidyr::spread(race, n)
d<- as.data.frame(d)
write.xlsx(d, "d.xlsx")
d<- unique_incidents_victims %>%
select(DT, race_eth) %>%
dplyr::group_by(race = race_eth, hour = hour(DT)) %>%
dplyr::count() %>%
filter(hour>0) %>% tidyr::spread(race, n)
colors <- c("Hisp" = "blue", "NHBlack" = "red", "NHWhite" = "green")
ggplot(d, aes(x=hour)) +
geom_line(aes(hour, Hisp*50/3, color = "Hisp"), size=.8, linetype="dashed", size=.5) +
geom_point(aes(hour, Hisp*50/3, color = "Hisp"),size=1) +
geom_line(aes(hour, NHBlack*50/3, color = "NHBlack"), linetype="dashed", size=.5) +
geom_point(aes(hour, NHBlack*50/3, color = "NHBlack"),size=1) +
geom_line(aes(hour, NHWhite, color = "NHWhite"), linetype="dashed", size=.5 ) +
geom_point(aes(hour, NHWhite, color = "NHWhite"),size=1) +
scale_y_continuous(
name = "White",
sec.axis = sec_axis(~ . *3/50 , name = "Non-White")) +
theme(panel.background = element_rect(fill= "white", color = "black",
linetype = "solid"), legend.position="top") +
labs(y = "Number of Overdose Responses",
x = "Hour of Day") + scale_colour_manual(values = colors) + scale_color_discrete(name = "Race/Ethnicity",
labels=c("Latino", "Non-Hispanic Black", "Non-Hispanic White")) +
ggtitle("Hourly Distribution of Drug Overdose Responses by Race/Ethnicity")
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.

d<- unique_incidents_victims %>%
select(DT, race_eth) %>%
dplyr::group_by(race = race_eth, month = month(DT)) %>%
dplyr::count() %>%
tidyr::spread(race, n)
ggplot(d, aes(x=month)) +
geom_line(aes(month, Hisp*50/3, color = "Hisp"), size=.8, linetype="dashed", size=.5) +
geom_point(aes(month, Hisp*50/3, color = "Hisp"),size=1) +
geom_line(aes(month, NHBlack*50/3, color = "NHBlack"), linetype="dashed", size=.5) +
geom_point(aes(month, NHBlack*50/3, color = "NHBlack"),size=1) +
geom_line(aes(month, NHWhite, color = "NHWhite"), linetype="dashed", size=.5 ) +
geom_point(aes(month, NHWhite, color = "NHWhite"),size=1) +
scale_y_continuous(
name = "White",
sec.axis = sec_axis(~ . *3/50 , name = "Non-White")) +
theme(panel.background = element_rect(fill= "white", color = "black",
linetype = "solid"), legend.position="top") +
labs(y = "Number of Overdose Responses",
x = "Month of Year") + scale_colour_manual(values = colors) + scale_color_discrete(name = "Race/Ethnicity",
labels=c("Latino", "Non-Hispanic Black", "Non-Hispanic White")) + scale_x_continuous(breaks=c(1:12)) +
ggtitle("Monthly Distribution of Drug Overdose Responses by Race/Ethnicity")
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.

round(prop.table(table(unique_incidents_victims$race_eth)),3)
##
## Hisp NHBlack NHWhite
## 0.069 0.060 0.870
round(prop.table(table(unique_incidents_victims$age)),3)
##
## 1 2 3 4 5 6
## 0.023 0.346 0.364 0.153 0.086 0.028
unique_incidents_victims %>%
mutate(wday = wday(DT, label = TRUE)) %>%
ggplot(aes(x = wday)) +
geom_bar()

unique_incidents_victims %>% #This is interesting but useless
mutate(minute = minute(DT)) %>%
group_by(minute) %>%
summarise(
avg_dose = mean(Dose.Unit, na.rm = TRUE),
n = n()) %>%
ggplot(aes(minute, avg_dose)) +
geom_line()

unique_incidents_victims %>%
filter(race_eth=="NHWhite") %>%
mutate(month = month(DT, label = TRUE)) %>%
ggplot(aes(x = month)) +
geom_bar()

unique_incidents_victims %>%
filter(race_eth=="NHBlack") %>%
mutate(month = month(DT, label = TRUE)) %>%
ggplot(aes(x = month)) +
geom_bar()

unique_incidents_victims %>%
filter(race_eth=="Hisp") %>%
mutate(month = month(DT, label = TRUE)) %>%
ggplot(aes(x = month)) +
geom_bar()

unique_incidents_victims %>%
filter(race_eth=="NHWhite") %>%
count(week = floor_date(DT, "week")) %>%
ggplot(aes(week, n)) +
geom_line()

unique_incidents_victims %>%
filter(race_eth=="NHBlack") %>%
count(week = floor_date(DT, "week")) %>%
ggplot(aes(week, n)) +
geom_line()

unique_incidents_victims %>%
filter(race_eth=="Hisp") %>%
count(week = floor_date(DT, "week")) %>%
ggplot(aes(week, n)) +
geom_line()

unique_incidents_victims %>%
filter(race_eth=="NHWhite") %>%
mutate(hour = hour(DT)) %>%
ggplot(aes(x = hour)) +
geom_bar()

unique_incidents_victims %>%
filter(race_eth=="NHBlack") %>%
mutate(hour = hour(DT)) %>%
ggplot(aes(x = hour)) +
geom_bar()

unique_incidents_victims %>%
filter(race_eth=="Hisp") %>%
mutate(hour = hour(DT)) %>%
ggplot(aes(x = hour)) +
geom_bar()

unique_incidents_victims$age_num <- as.numeric(as.character(unique_incidents_victims$age))
mod1 <- glm(Survived ~ Naloxone_Admin + age_num + hour + month + year +
heroin + pharma + fentanyl + Gender.Desc + white + black + black*Naloxone_Admin + white*Naloxone_Admin +
Incident.County.Latitude+Victim.County.Longitude , family='binomial', data=unique_incidents_victims)
summary(mod1)
##
## Call:
## glm(formula = Survived ~ Naloxone_Admin + age_num + hour + month +
## year + heroin + pharma + fentanyl + Gender.Desc + white +
## black + black * Naloxone_Admin + white * Naloxone_Admin +
## Incident.County.Latitude + Victim.County.Longitude, family = "binomial",
## data = unique_incidents_victims)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7734 0.2776 0.3577 0.5422 1.7904
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) -207.477251 131.588021 -1.577
## Naloxone_Admin 1.055234 1.126053 0.937
## age_num -0.252432 0.028354 -8.903
## hour 0.028220 0.004284 6.587
## month 0.009369 0.010400 0.901
## year 0.102870 0.065157 1.579
## heroin 0.134111 0.083995 1.597
## pharma 0.727335 0.181632 4.004
## fentanyl -0.842137 0.095979 -8.774
## Gender.DescMale -0.209888 0.070278 -2.987
## white -0.210459 1.008445 -0.209
## black 0.327980 1.022947 0.321
## Incident.County.Latitude -0.010577 0.064181 -0.165
## Victim.County.Longitude -0.018659 0.017285 -1.080
## Naloxone_Admin:black 0.956021 1.158564 0.825
## Naloxone_Admin:white 1.121666 1.128115 0.994
## Pr(>|z|)
## (Intercept) 0.11486
## Naloxone_Admin 0.34870
## age_num < 0.0000000000000002 ***
## hour 0.0000000000448 ***
## month 0.36766
## year 0.11438
## heroin 0.11034
## pharma 0.0000621629789 ***
## fentanyl < 0.0000000000000002 ***
## Gender.DescMale 0.00282 **
## white 0.83469
## black 0.74850
## Incident.County.Latitude 0.86910
## Victim.County.Longitude 0.28035
## Naloxone_Admin:black 0.40927
## Naloxone_Admin:white 0.32009
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 7960.5 on 8286 degrees of freedom
## Residual deviance: 6367.5 on 8271 degrees of freedom
## (2178 observations deleted due to missingness)
## AIC: 6399.5
##
## Number of Fisher Scoring iterations: 5
confint(mod1)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) -465.80591503 50.09333570
## Naloxone_Admin -1.25817601 3.39679880
## age_num -0.30805348 -0.19688534
## hour 0.01982626 0.03662254
## month -0.01101711 0.02975806
## year -0.02466898 0.23078418
## heroin -0.03131306 0.29801687
## pharma 0.37887933 1.09206857
## fentanyl -1.03098968 -0.65467684
## Gender.DescMale -0.34828156 -0.07274236
## white -2.34468500 1.92284355
## black -1.82911905 2.48437297
## Incident.County.Latitude -0.13583779 0.11580304
## Victim.County.Longitude -0.05260415 0.01516131
## Naloxone_Admin:black -1.43941842 3.32526055
## Naloxone_Admin:white -1.22326216 3.43861042
round(exp(coef(mod1)-1)*100,4)
## (Intercept) Naloxone_Admin age_num
## 0.0000 105.6788 28.5809
## hour month year
## 37.8409 37.1342 40.7738
## heroin pharma fentanyl
## 42.0677 76.1348 15.8478
## Gender.DescMale white black
## 29.8231 29.8060 51.0676
## Incident.County.Latitude Victim.County.Longitude Naloxone_Admin:black
## 36.4009 36.1079 95.6974
## Naloxone_Admin:white
## 112.9376
pR2(mod1)
## llh llhNull G2 McFadden r2ML
## -3183.7565419 -4543.3366670 2719.1602503 0.2992471 0.2797260
## r2CU
## 0.4200337
length(residuals(mod1)) #get number of observations
## [1] 8287
mod1_margins <- margins(mod1)
cplot(mod1, "age_num", what="predict", draw=TRUE)
## xvals yvals upper lower
## 1 1.000000 0.9114779 0.9234533 0.8995026
## 2 1.208333 0.9071418 0.9188539 0.8954297
## 3 1.416667 0.9026160 0.9140445 0.8911875
## 4 1.625000 0.8978944 0.9090268 0.8867621
## 5 1.833333 0.8929711 0.9038062 0.8821360
## 6 2.041667 0.8878400 0.8983921 0.8772879
## 7 2.250000 0.8824953 0.8927990 0.8721915
## 8 2.458333 0.8769312 0.8870470 0.8668154
## 9 2.666667 0.8711421 0.8811611 0.8611231
## 10 2.875000 0.8651226 0.8751703 0.8550749
## 11 3.083333 0.8588674 0.8691038 0.8486310
## 12 3.291667 0.8523717 0.8629869 0.8417564
## 13 3.500000 0.8456307 0.8568368 0.8344245
## 14 3.708333 0.8386401 0.8506606 0.8266197
## 15 3.916667 0.8313962 0.8444556 0.8183368
## 16 4.125000 0.8238953 0.8382122 0.8095783
## 17 4.333333 0.8161345 0.8319171 0.8003518
## 18 4.541667 0.8081113 0.8255559 0.7906668
## 19 4.750000 0.7998239 0.8191145 0.7805334
## 20 4.958333 0.7912711 0.8125804 0.7699618

allEffects(mod1)
## model: Survived ~ Naloxone_Admin + age_num + hour + month + year + heroin +
## pharma + fentanyl + Gender.Desc + white + black + black *
## Naloxone_Admin + white * Naloxone_Admin + Incident.County.Latitude +
## Victim.County.Longitude
##
## age_num effect
## age_num
## 1 2 4 5 6
## 0.9165814 0.8951394 0.8374646 0.8001211 0.7566903
##
## hour effect
## hour
## 0 5.8 12 17 23
## 0.8232485 0.8458206 0.8672840 0.8827005 0.8991283
##
## month effect
## month
## 1 4 6 9 10
## 0.8627379 0.8660326 0.8681918 0.8713752 0.8724216
##
## year effect
## year
## 2018 2018.2 2018.5 2018.8 2019
## 0.8635436 0.8659499 0.8694920 0.8729542 0.8752185
##
## heroin effect
## heroin
## 0 0.2 0.5 0.8 1
## 0.8597173 0.8629211 0.8676111 0.8721644 0.8751252
##
## pharma effect
## pharma
## 0 0.2 0.5 0.8 1
## 0.8653713 0.8814365 0.9024110 0.9200117 0.9300835
##
## fentanyl effect
## fentanyl
## 0 0.2 0.5 0.8 1
## 0.8859711 0.8678183 0.8360551 0.7984319 0.7699617
##
## Gender.Desc effect
## Gender.Desc
## Female Male
## 0.8845270 0.8612999
##
## Incident.County.Latitude effect
## Incident.County.Latitude
## 39.9 40.4 40.9 41.5 42
## 0.8695842 0.8689832 0.8683800 0.8676529 0.8670445
##
## Victim.County.Longitude effect
## Victim.County.Longitude
## -80 -79 -78 -76 -75
## 0.8745134 0.8724514 0.8703605 0.8660912 0.8639123
##
## Naloxone_Admin*black effect
## black
## Naloxone_Admin 0 0.2 0.5 0.8 1
## 0 0.6086178 0.6241269 0.6469127 0.6690496 0.6834096
## 0.2 0.7026141 0.7238448 0.7538717 0.7816187 0.7988241
## 0.5 0.8156553 0.8386723 0.8687760 0.8939725 0.9083102
## 0.8 0.8923133 0.9115856 0.9346851 0.9520669 0.9611113
## 1 0.9264137 0.9421147 0.9598773 0.9723493 0.9784767
##
## Naloxone_Admin*white effect
## white
## Naloxone_Admin 0 0.2 0.5 0.8 1
## 0 0.6592020 0.6496835 0.6351815 0.6204299 0.6104684
## 0.2 0.7077904 0.7083640 0.7092231 0.7100807 0.7106516
## 0.5 0.7724297 0.7845120 0.8017501 0.8179293 0.8281334
## 0.8 0.8262796 0.8451243 0.8702220 0.8917734 0.9043361
## 1 0.8562422 0.8772524 0.9037867 0.9250751 0.9367683
plot(effect("white", mod1))
## NOTE: white is not a high-order term in the model

plot(effect("age_num", mod1))

mod_hisp <- glm(Survived ~ Naloxone_Admin * Gender.Desc * hisp, family='binomial', data=unique_incidents_victims)
no_naloxone_male = data.frame("hisp"=1, "Naloxone_Admin"=0, "Gender.Desc" = "Male")
naloxone_male = data.frame("hisp"=1, "Naloxone_Admin"=1, "Gender.Desc" = "Male")
no_naloxone_female = data.frame("hisp"=1, "Naloxone_Admin"=0, "Gender.Desc" = "Female")
naloxone_female = data.frame("hisp"=1, "Naloxone_Admin"=1, "Gender.Desc" = "Female")
predict(mod_hisp, no_naloxone_male, type = "response")
## 1
## 0.734375
predict(mod_hisp, naloxone_male, type = "response")
## 1
## 0.9463722
predict(mod_hisp, no_naloxone_female, type = "response")
## 1
## 0.7540984
predict(mod_hisp, naloxone_female, type = "response")
## 1
## 0.8852459
mod_white <- glm(Survived ~ Naloxone_Admin * Gender.Desc * white , family='binomial', data=unique_incidents_victims)
no_naloxone_male = data.frame("white"=1, "Naloxone_Admin"=0, "Gender.Desc" = "Male")
naloxone_male = data.frame("white"=1, "Naloxone_Admin"=1, "Gender.Desc" = "Male")
no_naloxone_female = data.frame("white"=1, "Naloxone_Admin"=0, "Gender.Desc" = "Female")
naloxone_female = data.frame("white"=1, "Naloxone_Admin"=1, "Gender.Desc" = "Female")
predict(mod_white, no_naloxone_male, type = "response")
## 1
## 0.5851962
predict(mod_white, naloxone_male, type = "response")
## 1
## 0.9269917
predict(mod_white, no_naloxone_female, type = "response")
## 1
## 0.6568421
predict(mod_white, naloxone_female, type = "response")
## 1
## 0.9330986
#Note, for plots the order in which variables enter matters
mod_black <- glm(Survived ~ Naloxone_Admin * Gender.Desc * black , family='binomial', data=unique_incidents_victims)
no_naloxone_male = data.frame("black"=1, "Naloxone_Admin"=0, "Gender.Desc" = "Male")
naloxone_male = data.frame("black"=1, "Naloxone_Admin"=1, "Gender.Desc" = "Male")
no_naloxone_female = data.frame("black"=1, "Naloxone_Admin"=0, "Gender.Desc" = "Female")
naloxone_female = data.frame("black"=1, "Naloxone_Admin"=1, "Gender.Desc" = "Female")
predict(mod_black, no_naloxone_male, type = "response")
## 1
## 0.7037037
predict(mod_black, naloxone_male, type = "response")
## 1
## 0.9513514
predict(mod_black, no_naloxone_female, type = "response")
## 1
## 0.7727273
predict(mod_black, naloxone_female, type = "response")
## 1
## 0.9237288
theme_set(theme_sjplot())
plot_model(mod_white, type = "pred", terms = c("Naloxone_Admin", "white"))

plot_model(mod_black, type = "pred", terms = c("Naloxone_Admin", "black"))

plot_model(mod_hisp, type = "pred", terms = c("Naloxone_Admin", "hisp"))

plot_model(mod_hisp, type = "int")

plot_model(mod_white, type = "int")

plot_model(mod_black, type = "int")

#######################
x<-aggregate(Incident.ID ~ Incident.County.Name, FUN = length, data = unique_incidents_victims)
x_black<-aggregate(Incident.ID ~ Incident.County.Name, FUN = length, data = subset(unique_incidents_victims, black==1))
x_white<-aggregate(Incident.ID ~ Incident.County.Name, FUN = length, data = subset(unique_incidents_victims, white==1))
x_hisp<-aggregate(Incident.ID ~ Incident.County.Name, FUN = length, data = subset(unique_incidents_victims, hisp==1))
pop_dat <- read.csv("/Users/gbarboza/Desktop/ODIN_PA/pop.csv")
x <- merge(x_black, x, by = "Incident.County.Name", all.y = TRUE)
x <- merge(x_white, x, by = "Incident.County.Name", all.y = TRUE)
x <- merge(x_hisp, x, by = "Incident.County.Name", all.y = TRUE)
names(x)[1] <- "NAME"
names(x)[2] <- "nOD_hisp"
names(x)[3] <- "nOD_white"
names(x)[4] <- "nOD_black"
names(x)[5] <- "nOD_all"
x[is.na(x)] = 0
overdose_rates <- merge(x, pop_dat, by = "NAME")
overdose_rates$nOD_all_rate <- ((overdose_rates$nOD_all/overdose_rates$ACS_17_5YR)*100000)/2
overdose_rates$nOD_hisp_rate <- ((overdose_rates$nOD_hisp/overdose_rates$pophisp)*100000)/2
overdose_rates$nOD_white_rate <- ((overdose_rates$nOD_white/overdose_rates$popwhite)*100000)/2
overdose_rates$nOD_black_rate <- ((overdose_rates$nOD_black/overdose_rates$popblack)*100000)/2
penn <- readOGR("/Users/gbarboza/Desktop/OD in PA/", "county demographics")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/gbarboza/Desktop/OD in PA", layer: "county demographics"
## with 67 features
## It has 27 fields
## Integer64 fields read as strings: newAggct popdens urban CL
plot(penn)

overdose_rates <- overdose_rates %>% mutate(urban = factor(urban, levels = c(0, 1), labels = c("Rural", "Urban")))
gd <- overdose_rates %>%
group_by(urban) %>%
summarise(nOD_all_rate = mean(nOD_all_rate))
ggplot(overdose_rates, aes(x = urban, y = nOD_all_rate, color = urban, fill = urban)) +
geom_bar(data = gd, stat = "identity", alpha = .3) +
ggrepel::geom_text_repel(aes(label = overdose_rates$NAME), color = "black", size = 2.5, segment.color = "grey") +
geom_point() +
guides(color = "none", fill = "none") +
theme_bw() +
labs(
title = "Heroin Overdose Response Rate by Urbanicity",
subtitle = "Source: Pennsylvania Overdose Information Network (2018 - 2019)",
x = "Region",
y = "OD RATE"
)

# We create the data foundation from the shapefile
penn@data$id = rownames(penn@data)
penn.points = fortify(penn, region="id")
## SpP is invalid
penn.df = join(penn.points, penn@data, by="id")
# We merge with churn regional data
merged <- merge(penn.df, overdose_rates, by = "NAME")
merged <- merged[order(merged$order), ]
no_classes <- 5
labels <- c()
#all
quantiles <- quantile(merged$nOD_all_rate, probs = seq(0, 1, length.out = no_classes + 1))
for(idx in 1:length(quantiles)){
labels <- c(labels, paste0(round(quantiles[idx], 2),
" – ",
round(quantiles[idx + 1], 2)))
}
labels <- labels[1:length(labels)-1]
merged$OD_all_quantiles <- cut(merged$nOD_all_rate,
breaks = quantiles,
labels = labels,
include.lowest = T)
p <- ggplot() +
geom_polygon(data = merged, aes(fill = OD_all_quantiles,
x = long,
y = lat,
group = group)) +
coord_equal() +
labs(x = NULL,
y = NULL,
title = "Spatial Distribution of Overdose Response Rates",
subtitle = "Pennsylvania Counties",
caption = "Source: Pennsylvania Overdose Information Network (2018-2019)") +
# now the discrete-option is used,
# and we use guide_legend instead of guide_colourbar
scale_fill_viridis(
option = "cividis",
name = "Overdose Response Rate",
discrete = T,
direction = -1,
guide = guide_legend(
keyheight = unit(5, units = "mm"),
title.position = 'top',
reverse = T
))
p

#white
# We create the data foundation from the shapefile
penn@data$id = rownames(penn@data)
penn.points = fortify(penn, region="id")
## SpP is invalid
penn.df = join(penn.points, penn@data, by="id")
# We merge with churn regional data
merged <- merge(penn.df, overdose_rates, by = "NAME")
merged <- merged[order(merged$order), ]
no_classes <- 8
labels <- c()
quantiles <- quantile(merged$nOD_white_rate, probs = seq(0, 1, length.out = no_classes + 1))
for(idx in 1:length(quantiles)){
labels <- c(labels, paste0(round(quantiles[idx], 2),
" – ",
round(quantiles[idx + 1], 2)))
}
labels <- labels[1:length(labels)-1]
merged$OD_white_quantiles <- cut(merged$nOD_white_rate,
breaks = quantiles,
labels = labels,
include.lowest = T)
p <- ggplot() +
geom_polygon(data = merged, aes(fill = OD_white_quantiles,
x = long,
y = lat,
group = group)) +
coord_equal() +
labs(x = NULL,
y = NULL,
title = "Spatial Distribution of Overdose Response Rates [Whites]",
subtitle = "Pennsylvania Counties",
caption = "Source: Pennsylvania Overdose Information Network (2018-2019)") +
# now the discrete-option is used,
# and we use guide_legend instead of guide_colourbar
scale_fill_viridis(
option = "cividis",
name = "Overdose Response Rate [Whites]",
discrete = T,
direction = -1,
guide = guide_legend(
keyheight = unit(5, units = "mm"),
title.position = 'top',
reverse = T
))
p

#black
# We create the data foundation from the shapefile
penn@data$id = rownames(penn@data)
penn.points = fortify(penn, region="id")
## SpP is invalid
penn.df = join(penn.points, penn@data, by="id")
# We merge with churn regional data
merged <- merge(penn.df, overdose_rates, by = "NAME")
merged <- merged[order(merged$order), ]
no_classes <- 10
labels <- c()
quantiles <- unique(quantile(merged$nOD_black_rate, probs = seq(0, 1, length.out = no_classes + 1)))
for(idx in 1:length(quantiles)){
labels <- c(labels, paste0(round(quantiles[idx], 2),
" – ",
round(quantiles[idx + 1], 2)))
}
labels <- labels[1:length(labels)-1]
merged$OD_black_quantiles <- cut(merged$nOD_black_rate,
breaks = quantiles,
labels = labels,
include.lowest = T)
p <- ggplot() +
geom_polygon(data = merged, aes(fill = OD_black_quantiles,
x = long,
y = lat,
group = group)) +
coord_equal() +
labs(x = NULL,
y = NULL,
title = "Spatial Distribution of Overdose Response Rates [Black]",
subtitle = "Pennsylvania Counties",
caption = "Source: Pennsylvania Overdose Information Network (2018-2019)") +
# now the discrete-option is used,
# and we use guide_legend instead of guide_colourbar
scale_fill_viridis(
option = "cividis",
name = "Overdose Response Rate [Black]",
discrete = T,
direction = -1,
guide = guide_legend(
keyheight = unit(5, units = "mm"),
title.position = 'top',
reverse = T
))
p

#hisp
# We create the data foundation from the shapefile
penn@data$id = rownames(penn@data)
penn.points = fortify(penn, region="id")
## SpP is invalid
penn.df = join(penn.points, penn@data, by="id")
# We merge with churn regional data
merged <- merge(penn.df, overdose_rates, by = "NAME")
merged <- merged[order(merged$order), ]
no_classes <- 14
labels <- c()
quantiles <- unique(quantile(merged$nOD_hisp_rate, probs = seq(0, 1, length.out = no_classes + 1)))
for(idx in 1:length(quantiles)){
labels <- c(labels, paste0(round(quantiles[idx], 2),
" – ",
round(quantiles[idx + 1], 2)))
}
labels <- labels[1:length(labels)-1]
merged$OD_hisp_quantiles <- cut(merged$nOD_hisp_rate,
breaks = quantiles,
labels = labels,
include.lowest = T)
p <- ggplot() +
geom_polygon(data = merged, aes(fill = OD_hisp_quantiles,
x = long,
y = lat,
group = group)) +
coord_equal() +
labs(x = NULL,
y = NULL,
title = "Spatial Distribution of Overdose Response Rates [Latinos]",
subtitle = "Pennsylvania Counties",
caption = "Source: Pennsylvania Overdose Information Network (2018-2019)") +
# now the discrete-option is used,
# and we use guide_legend instead of guide_colourbar
scale_fill_viridis(
option = "cividis",
name = "Overdose Response Rate [Latinos]",
discrete = T,
direction = -1,
guide = guide_legend(
keyheight = unit(5, units = "mm"),
title.position = 'top',
reverse = T
))
p
